Clear workspace

rm(list = ls())
library(bigrquery)
library(stringr)
library(tidyverse)
library(dplyr)
library(lme4)
library(MuMIn)
library(scales)
response <- try(system('~/google-cloud-sdk/bin/gcloud projects list --quiet', intern = T))
projectid <- strsplit(response[2], " ")[[1]][1]
create_dataset <- function(poolname) {
  sql <- str_replace_all("SELECT 
    ##POOL_NAME##.percentage_of_regional_pool_present,
    ##POOL_NAME##.difference_from_locality_trait_gravity,
    ##POOL_NAME##.percentage_of_niches_present,
    ##POOL_NAME##.percentage_of_niches_2_present,
    ##POOL_NAME##.percentage_of_niches_3_present,
    latitude,
    longitude,
    percentage_landcover_5km.closed_forest_total AS closed_forest,
    percentage_landcover_5km.cultivated,
    percentage_landcover_5km.herbaceous_vegetation,
    percentage_landcover_5km.herbaceous_wetland,
    percentage_landcover_5km.open_forest_total AS open_forest,
    percentage_landcover_5km.permanent_water,
    percentage_landcover_5km.shrubs,
    percentage_landcover_5km.urban,
    percentage_landcover_5km.elevation.mean AS mean_elevation,
    percentage_landcover_5km.elevation.delta AS elevation_delta,
    average_population_density.within_5km AS average_population_density,
    urban_area.name AS city_name,
    urban_area.location.continent,
    urban_area.ecosystem.realm,
    urban_area.ecosystem.biome.biome_name AS biome,
    urban_area.country_economy.gdp_estimate_thousand_dollars_per_person AS national_gdp_estimate_thousand_dollars_per_person,
    urban_area.country_economy.income_group AS national_income_group,
    locality_id
FROM model.urban_hotspot
JOIN model2.all_species USING(locality_id, city_id)
JOIN model.urban_area USING (city_id)", '##POOL_NAME##', poolname)

  print(sql)
  
  tb <- bq_project_query(projectid, sql)

  bq_table_download(tb)
}
load_dataset <- function(poolname) {
  filename <- str_replace('hotspot_metrics_##POOL_NAME##.csv', '##POOL_NAME##', poolname)
  
  if (!file.exists(filename)) {
    data <- create_dataset(poolname)
    write_csv(data, filename)
  }
  
  
  data <- read_csv(filename)
  
  data$city_name = as.factor(data$city_name)
  data$continent = relevel(as.factor(data$continent), ref = "Europe")
  data$realm = relevel(as.factor(data$realm), ref = "Palearctic")
  data$biome = as.factor(data$biome)
  data$national_income_group = as.factor(data$national_income_group)
  data$mean_elevation_scaled = rescale(data$mean_elevation, to = c(0, 1), from = range(data$mean_elevation, na.rm = TRUE, finite = TRUE))
  data$elevation_delta_scaled = rescale(data$elevation_delta, to = c(0, 1), from = range(data$elevation_delta, na.rm = TRUE, finite = TRUE))
  data$average_population_density_scaled = rescale(data$average_population_density, to = c(0, 1), from = range(data$average_population_density, na.rm = TRUE, finite = TRUE))
  data$national_gdp_estimate_thousand_dollars_per_person_scaled = rescale(data$national_gdp_estimate_thousand_dollars_per_person, to = c(0, 1), from = range(data$national_gdp_estimate_thousand_dollars_per_person, na.rm = TRUE, finite = TRUE))
  data$latitude_scaled = rescale(data$latitude, to = c(0, 1), from = range(data$latitude, na.rm = TRUE, finite = TRUE))
  data$longitude_scaled = rescale(data$longitude, to = c(0, 1), from = range(data$longitude, na.rm = TRUE, finite = TRUE))
  data$absolute_latitude_scaled = abs(data$latitude_scaled)
  data
}

Merlin

merlin <- load_dataset('merlin')

── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
  .default = col_double(),
  city_name = col_character(),
  continent = col_character(),
  realm = col_character(),
  biome = col_character(),
  national_income_group = col_character(),
  locality_id = col_character()
)
ℹ Use `spec()` for the full column specifications.
merlin

Birdlife

birdlife <- load_dataset('birdlife')

── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
  .default = col_double(),
  city_name = col_character(),
  continent = col_character(),
  realm = col_character(),
  biome = col_character(),
  national_income_group = col_character(),
  locality_id = col_character()
)
ℹ Use `spec()` for the full column specifications.
birdlife

Both

both <- load_dataset('both')

── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
  .default = col_double(),
  city_name = col_character(),
  continent = col_character(),
  realm = col_character(),
  biome = col_character(),
  national_income_group = col_character(),
  locality_id = col_character()
)
ℹ Use `spec()` for the full column specifications.
both

Either

either <- load_dataset('either')

── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
  .default = col_double(),
  city_name = col_character(),
  continent = col_character(),
  realm = col_character(),
  biome = col_character(),
  national_income_group = col_character(),
  locality_id = col_character()
)
ℹ Use `spec()` for the full column specifications.
either
population_growth <- function(city_row) {
  population <- c(city_row$pop1950, city_row$pop1955, city_row$pop1960, city_row$pop1965, city_row$pop1970, city_row$pop1975, city_row$pop1980, city_row$pop1985, city_row$pop1990, city_row$pop1995, city_row$pop2000, city_row$pop2005, city_row$pop2010, city_row$pop2015, city_row$pop2020)
  years <- c(1950, 1955, 1960, 1965, 1970, 1975, 1980, 1985, 1990, 1995, 2000, 2005, 2010, 2015, 2020)
  
  model <- lm(population ~ years)
  model$coefficients[2]
}
city_data <- read_csv('city_data.csv')

── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
  .default = col_double(),
  name = col_character(),
  city_includes_estuary = col_logical(),
  region_100km_includes_estuary = col_logical(),
  region_50km_includes_estuary = col_logical(),
  region_20km_includes_estuary = col_logical(),
  biome_name = col_character(),
  realm = col_character()
)
ℹ Use `spec()` for the full column specifications.
city_data$realm <- as.factor(city_data$realm)
city_data$city_includes_estuary <- as.factor(city_data$city_includes_estuary)
city_data$region_100km_includes_estuary <- as.factor(city_data$region_100km_includes_estuary)
city_data$region_50km_includes_estuary <- as.factor(city_data$region_50km_includes_estuary)
city_data$region_20km_includes_estuary <- as.factor(city_data$region_20km_includes_estuary)
city_data$biome_name <- as.factor(city_data$biome_name)

city_data$population_growth = 0

for(i in 1:nrow(city_data)) {
    city_data[i,]$population_growth = population_growth(city_data[i,])
}

city_data
LS0tCnRpdGxlOiAiRG93bmxvYWQgbG9jYWxpdHkgZGF0YSIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKQ2xlYXIgd29ya3NwYWNlCmBgYHtyfQpybShsaXN0ID0gbHMoKSkKYGBgCgpgYGB7cn0KbGlicmFyeShiaWdycXVlcnkpCmxpYnJhcnkoc3RyaW5ncikKbGlicmFyeSh0aWR5dmVyc2UpCmxpYnJhcnkoZHBseXIpCmxpYnJhcnkobG1lNCkKbGlicmFyeShNdU1JbikKbGlicmFyeShzY2FsZXMpCmBgYAoKYGBge3J9CnJlc3BvbnNlIDwtIHRyeShzeXN0ZW0oJ34vZ29vZ2xlLWNsb3VkLXNkay9iaW4vZ2Nsb3VkIHByb2plY3RzIGxpc3QgLS1xdWlldCcsIGludGVybiA9IFQpKQpwcm9qZWN0aWQgPC0gc3Ryc3BsaXQocmVzcG9uc2VbMl0sICIgIilbWzFdXVsxXQpgYGAKICAKCmBgYHtyfQpjcmVhdGVfZGF0YXNldCA8LSBmdW5jdGlvbihwb29sbmFtZSkgewogIHNxbCA8LSBzdHJfcmVwbGFjZV9hbGwoIlNFTEVDVCAKICAgICMjUE9PTF9OQU1FIyMucGVyY2VudGFnZV9vZl9yZWdpb25hbF9wb29sX3ByZXNlbnQsCiAgICAjI1BPT0xfTkFNRSMjLmRpZmZlcmVuY2VfZnJvbV9sb2NhbGl0eV90cmFpdF9ncmF2aXR5LAogICAgIyNQT09MX05BTUUjIy5wZXJjZW50YWdlX29mX25pY2hlc19wcmVzZW50LAogICAgIyNQT09MX05BTUUjIy5wZXJjZW50YWdlX29mX25pY2hlc18yX3ByZXNlbnQsCiAgICAjI1BPT0xfTkFNRSMjLnBlcmNlbnRhZ2Vfb2ZfbmljaGVzXzNfcHJlc2VudCwKICAgIGxhdGl0dWRlLAogICAgbG9uZ2l0dWRlLAogICAgcGVyY2VudGFnZV9sYW5kY292ZXJfNWttLmNsb3NlZF9mb3Jlc3RfdG90YWwgQVMgY2xvc2VkX2ZvcmVzdCwKICAgIHBlcmNlbnRhZ2VfbGFuZGNvdmVyXzVrbS5jdWx0aXZhdGVkLAogICAgcGVyY2VudGFnZV9sYW5kY292ZXJfNWttLmhlcmJhY2VvdXNfdmVnZXRhdGlvbiwKICAgIHBlcmNlbnRhZ2VfbGFuZGNvdmVyXzVrbS5oZXJiYWNlb3VzX3dldGxhbmQsCiAgICBwZXJjZW50YWdlX2xhbmRjb3Zlcl81a20ub3Blbl9mb3Jlc3RfdG90YWwgQVMgb3Blbl9mb3Jlc3QsCiAgICBwZXJjZW50YWdlX2xhbmRjb3Zlcl81a20ucGVybWFuZW50X3dhdGVyLAogICAgcGVyY2VudGFnZV9sYW5kY292ZXJfNWttLnNocnVicywKICAgIHBlcmNlbnRhZ2VfbGFuZGNvdmVyXzVrbS51cmJhbiwKICAgIHBlcmNlbnRhZ2VfbGFuZGNvdmVyXzVrbS5lbGV2YXRpb24ubWVhbiBBUyBtZWFuX2VsZXZhdGlvbiwKICAgIHBlcmNlbnRhZ2VfbGFuZGNvdmVyXzVrbS5lbGV2YXRpb24uZGVsdGEgQVMgZWxldmF0aW9uX2RlbHRhLAogICAgYXZlcmFnZV9wb3B1bGF0aW9uX2RlbnNpdHkud2l0aGluXzVrbSBBUyBhdmVyYWdlX3BvcHVsYXRpb25fZGVuc2l0eSwKICAgIHVyYmFuX2FyZWEubmFtZSBBUyBjaXR5X25hbWUsCiAgICB1cmJhbl9hcmVhLmxvY2F0aW9uLmNvbnRpbmVudCwKICAgIHVyYmFuX2FyZWEuZWNvc3lzdGVtLnJlYWxtLAogICAgdXJiYW5fYXJlYS5lY29zeXN0ZW0uYmlvbWUuYmlvbWVfbmFtZSBBUyBiaW9tZSwKICAgIHVyYmFuX2FyZWEuY291bnRyeV9lY29ub215LmdkcF9lc3RpbWF0ZV90aG91c2FuZF9kb2xsYXJzX3Blcl9wZXJzb24gQVMgbmF0aW9uYWxfZ2RwX2VzdGltYXRlX3Rob3VzYW5kX2RvbGxhcnNfcGVyX3BlcnNvbiwKICAgIHVyYmFuX2FyZWEuY291bnRyeV9lY29ub215LmluY29tZV9ncm91cCBBUyBuYXRpb25hbF9pbmNvbWVfZ3JvdXAsCiAgICBsb2NhbGl0eV9pZApGUk9NIG1vZGVsLnVyYmFuX2hvdHNwb3QKSk9JTiBtb2RlbDIuYWxsX3NwZWNpZXMgVVNJTkcobG9jYWxpdHlfaWQsIGNpdHlfaWQpCkpPSU4gbW9kZWwudXJiYW5fYXJlYSBVU0lORyAoY2l0eV9pZCkiLCAnIyNQT09MX05BTUUjIycsIHBvb2xuYW1lKQoKICBwcmludChzcWwpCiAgCiAgdGIgPC0gYnFfcHJvamVjdF9xdWVyeShwcm9qZWN0aWQsIHNxbCkKCiAgYnFfdGFibGVfZG93bmxvYWQodGIpCn0KYGBgCgoKYGBge3J9CmxvYWRfZGF0YXNldCA8LSBmdW5jdGlvbihwb29sbmFtZSkgewogIGZpbGVuYW1lIDwtIHN0cl9yZXBsYWNlKCdob3RzcG90X21ldHJpY3NfIyNQT09MX05BTUUjIy5jc3YnLCAnIyNQT09MX05BTUUjIycsIHBvb2xuYW1lKQogIAogIGlmICghZmlsZS5leGlzdHMoZmlsZW5hbWUpKSB7CiAgICBkYXRhIDwtIGNyZWF0ZV9kYXRhc2V0KHBvb2xuYW1lKQogICAgd3JpdGVfY3N2KGRhdGEsIGZpbGVuYW1lKQogIH0KICAKICAKICBkYXRhIDwtIHJlYWRfY3N2KGZpbGVuYW1lKQogIAogIGRhdGEkY2l0eV9uYW1lID0gYXMuZmFjdG9yKGRhdGEkY2l0eV9uYW1lKQogIGRhdGEkY29udGluZW50ID0gcmVsZXZlbChhcy5mYWN0b3IoZGF0YSRjb250aW5lbnQpLCByZWYgPSAiRXVyb3BlIikKICBkYXRhJHJlYWxtID0gcmVsZXZlbChhcy5mYWN0b3IoZGF0YSRyZWFsbSksIHJlZiA9ICJQYWxlYXJjdGljIikKICBkYXRhJGJpb21lID0gYXMuZmFjdG9yKGRhdGEkYmlvbWUpCiAgZGF0YSRuYXRpb25hbF9pbmNvbWVfZ3JvdXAgPSBhcy5mYWN0b3IoZGF0YSRuYXRpb25hbF9pbmNvbWVfZ3JvdXApCiAgZGF0YSRtZWFuX2VsZXZhdGlvbl9zY2FsZWQgPSByZXNjYWxlKGRhdGEkbWVhbl9lbGV2YXRpb24sIHRvID0gYygwLCAxKSwgZnJvbSA9IHJhbmdlKGRhdGEkbWVhbl9lbGV2YXRpb24sIG5hLnJtID0gVFJVRSwgZmluaXRlID0gVFJVRSkpCiAgZGF0YSRlbGV2YXRpb25fZGVsdGFfc2NhbGVkID0gcmVzY2FsZShkYXRhJGVsZXZhdGlvbl9kZWx0YSwgdG8gPSBjKDAsIDEpLCBmcm9tID0gcmFuZ2UoZGF0YSRlbGV2YXRpb25fZGVsdGEsIG5hLnJtID0gVFJVRSwgZmluaXRlID0gVFJVRSkpCiAgZGF0YSRhdmVyYWdlX3BvcHVsYXRpb25fZGVuc2l0eV9zY2FsZWQgPSByZXNjYWxlKGRhdGEkYXZlcmFnZV9wb3B1bGF0aW9uX2RlbnNpdHksIHRvID0gYygwLCAxKSwgZnJvbSA9IHJhbmdlKGRhdGEkYXZlcmFnZV9wb3B1bGF0aW9uX2RlbnNpdHksIG5hLnJtID0gVFJVRSwgZmluaXRlID0gVFJVRSkpCiAgZGF0YSRuYXRpb25hbF9nZHBfZXN0aW1hdGVfdGhvdXNhbmRfZG9sbGFyc19wZXJfcGVyc29uX3NjYWxlZCA9IHJlc2NhbGUoZGF0YSRuYXRpb25hbF9nZHBfZXN0aW1hdGVfdGhvdXNhbmRfZG9sbGFyc19wZXJfcGVyc29uLCB0byA9IGMoMCwgMSksIGZyb20gPSByYW5nZShkYXRhJG5hdGlvbmFsX2dkcF9lc3RpbWF0ZV90aG91c2FuZF9kb2xsYXJzX3Blcl9wZXJzb24sIG5hLnJtID0gVFJVRSwgZmluaXRlID0gVFJVRSkpCiAgZGF0YSRsYXRpdHVkZV9zY2FsZWQgPSByZXNjYWxlKGRhdGEkbGF0aXR1ZGUsIHRvID0gYygwLCAxKSwgZnJvbSA9IHJhbmdlKGRhdGEkbGF0aXR1ZGUsIG5hLnJtID0gVFJVRSwgZmluaXRlID0gVFJVRSkpCiAgZGF0YSRsb25naXR1ZGVfc2NhbGVkID0gcmVzY2FsZShkYXRhJGxvbmdpdHVkZSwgdG8gPSBjKDAsIDEpLCBmcm9tID0gcmFuZ2UoZGF0YSRsb25naXR1ZGUsIG5hLnJtID0gVFJVRSwgZmluaXRlID0gVFJVRSkpCiAgZGF0YSRhYnNvbHV0ZV9sYXRpdHVkZV9zY2FsZWQgPSBhYnMoZGF0YSRsYXRpdHVkZV9zY2FsZWQpCiAgZGF0YQp9CmBgYAoKCk1lcmxpbgotLS0tLS0tLS0tLS0tLS0tLS0tLS0tCgpgYGB7cn0KbWVybGluIDwtIGxvYWRfZGF0YXNldCgnbWVybGluJykKbWVybGluCmBgYAoKQmlyZGxpZmUKLS0tLS0tLS0tLS0tLS0tLS0tLS0tLQoKYGBge3J9CmJpcmRsaWZlIDwtIGxvYWRfZGF0YXNldCgnYmlyZGxpZmUnKQpiaXJkbGlmZQpgYGAKCgpCb3RoCi0tLS0tLS0tLS0tLS0tLS0tLS0tLS0KCmBgYHtyfQpib3RoIDwtIGxvYWRfZGF0YXNldCgnYm90aCcpCmJvdGgKYGBgCgpFaXRoZXIKLS0tLS0tLS0tLS0tLS0tLS0tLS0tLQoKYGBge3J9CmVpdGhlciA8LSBsb2FkX2RhdGFzZXQoJ2VpdGhlcicpCmVpdGhlcgpgYGAKCmBgYHtyfQpwb3B1bGF0aW9uX2dyb3d0aCA8LSBmdW5jdGlvbihjaXR5X3JvdykgewogIHBvcHVsYXRpb24gPC0gYyhjaXR5X3JvdyRwb3AxOTUwLCBjaXR5X3JvdyRwb3AxOTU1LCBjaXR5X3JvdyRwb3AxOTYwLCBjaXR5X3JvdyRwb3AxOTY1LCBjaXR5X3JvdyRwb3AxOTcwLCBjaXR5X3JvdyRwb3AxOTc1LCBjaXR5X3JvdyRwb3AxOTgwLCBjaXR5X3JvdyRwb3AxOTg1LCBjaXR5X3JvdyRwb3AxOTkwLCBjaXR5X3JvdyRwb3AxOTk1LCBjaXR5X3JvdyRwb3AyMDAwLCBjaXR5X3JvdyRwb3AyMDA1LCBjaXR5X3JvdyRwb3AyMDEwLCBjaXR5X3JvdyRwb3AyMDE1LCBjaXR5X3JvdyRwb3AyMDIwKQogIHllYXJzIDwtIGMoMTk1MCwgMTk1NSwgMTk2MCwgMTk2NSwgMTk3MCwgMTk3NSwgMTk4MCwgMTk4NSwgMTk5MCwgMTk5NSwgMjAwMCwgMjAwNSwgMjAxMCwgMjAxNSwgMjAyMCkKICAKICBtb2RlbCA8LSBsbShwb3B1bGF0aW9uIH4geWVhcnMpCiAgbW9kZWwkY29lZmZpY2llbnRzWzJdCn0KYGBgCgoKYGBge3J9CmNpdHlfZGF0YSA8LSByZWFkX2NzdignY2l0eV9kYXRhLmNzdicpCmNpdHlfZGF0YSRyZWFsbSA8LSBhcy5mYWN0b3IoY2l0eV9kYXRhJHJlYWxtKQpjaXR5X2RhdGEkY2l0eV9pbmNsdWRlc19lc3R1YXJ5IDwtIGFzLmZhY3RvcihjaXR5X2RhdGEkY2l0eV9pbmNsdWRlc19lc3R1YXJ5KQpjaXR5X2RhdGEkcmVnaW9uXzEwMGttX2luY2x1ZGVzX2VzdHVhcnkgPC0gYXMuZmFjdG9yKGNpdHlfZGF0YSRyZWdpb25fMTAwa21faW5jbHVkZXNfZXN0dWFyeSkKY2l0eV9kYXRhJHJlZ2lvbl81MGttX2luY2x1ZGVzX2VzdHVhcnkgPC0gYXMuZmFjdG9yKGNpdHlfZGF0YSRyZWdpb25fNTBrbV9pbmNsdWRlc19lc3R1YXJ5KQpjaXR5X2RhdGEkcmVnaW9uXzIwa21faW5jbHVkZXNfZXN0dWFyeSA8LSBhcy5mYWN0b3IoY2l0eV9kYXRhJHJlZ2lvbl8yMGttX2luY2x1ZGVzX2VzdHVhcnkpCmNpdHlfZGF0YSRiaW9tZV9uYW1lIDwtIGFzLmZhY3RvcihjaXR5X2RhdGEkYmlvbWVfbmFtZSkKCmNpdHlfZGF0YSRwb3B1bGF0aW9uX2dyb3d0aCA9IDAKCmZvcihpIGluIDE6bnJvdyhjaXR5X2RhdGEpKSB7CiAgICBjaXR5X2RhdGFbaSxdJHBvcHVsYXRpb25fZ3Jvd3RoID0gcG9wdWxhdGlvbl9ncm93dGgoY2l0eV9kYXRhW2ksXSkKfQoKY2l0eV9kYXRhCmBgYAoKCgo=